function [ENC_Output] = ENC(Input_RSCU_Struct)
%This function calculates the effective number of codons in a gene as
%defined in:
%Wright, F. (1990). The ‘effective number of codons’ used in a
%gene. https://doi.org/10.1016/0378-1119(90)90491-9.

% The input is a single struct generated by RSCUstruct. The function takes in the
% RSCU struct for the sequence being interrogated and determines f^ (as
% defined from the ENC paper by Wright "The 'effective number of codons' used in a
%gene"), for each of the amino acids.  It then averages the f^ values for
%each amino acid groups of 2, 3, 4, or 6 isotypes (met and phe are default
%1).  Then just calculates ENC based on the formula.  Computation is only
%to calculate f^ for the given sequence.

%Note that we first sum the square of the frequencey for each codon for a
%given AA, then subtract 1, then multiply by the sum of all codons for that
%AA, then divide by the total codons for that AA - 1.

%%
%Starts off with some housekeeping tasks:

%checks to make sure the inputs are structs
if isstruct(Input_RSCU_Struct) == 0
    error('Input must be of type struct')
    return
end

%%
%This section just creates a reference struct for indexing and keeping the
%format of information associated with each codon consistent.  The
%reference struct is called AAs.  There is also the variable "codons",
%which provides the length of iterations to get through all the codons, and
%names, which provides a vector of all the names of each AA.

%Assigns codons to each amino acid for use in struct building
ala = ["GCA"; "GCC"; "GCG"; "GCT"];
arg = ["AGA"; "AGG"; "CGA"; "CGC"; "CGG"; "CGT"];
asn = ["AAC"; "AAT"];
asp = ["GAC"; "GAT"];
cys = ["TGC"; "TGT"];
gln = ["CAA"; "CAG"];
glu = ["GAA"; "GAG"];
gly = ["GGA"; "GGC"; "GGG"; "GGT"];
his = ["CAC"; "CAT"];
ile = ["ATA"; "ATC"; "ATT"];
leu = ["CTA"; "CTC"; "CTG"; "CTT"; "TTA"; "TTG"];
lys = ["AAA"; "AAG"];
met = ["ATG"];
phe = ["TTC"; "TTT"];
pro = ["CCA"; "CCC"; "CCG"; "CCT"];
ser = ["AGC"; "AGT"; "TCA"; "TCC"; "TCG"; "TCT"];
thr = ["ACA"; "ACC"; "ACG"; "ACT"];
trp = ["TGG"];
tyr = ["TAC"; "TAT"];
val = ["GTA"; "GTC"; "GTG"; "GTT"];

%builds a struct with each amino acid and each codon for calculating RSCU
%and having appropriate indexing for a given amino acid and the codons
%assigned to it.
AAs.ala = ala;
AAs.arg = arg;
AAs.asn = asn;
AAs.asp = asp;
AAs.cys = cys;
AAs.gln = gln;
AAs.glu = glu;
AAs.gly = gly;
AAs.his = his;
AAs.ile = ile;
AAs.leu = leu;
AAs.lys = lys;
AAs.met = met;
AAs.phe = phe;
AAs.pro = pro;
AAs.ser = ser;
AAs.thr = thr;
AAs.trp = trp;
AAs.tyr = tyr;
AAs.val = val;

%Gets a vector of names for indexing the AAs struct.
names = fieldnames(AAs);
%Makes another vector for accessing frequency data since AAs are
%capitalized.
names2 = {'Ala'; 'Arg'; 'Asn'; 'Asp'; 'Cys'; 'Gln'; 'Glu'; 'Gly'; 'His'; 'Ile'; 'Leu'; 'Lys'; 'Met'; 'Phe'; 'Pro'; 'Ser'; 'Thr'; 'Trp'; 'Tyr'; 'Val'};

%%
%Now calculating ENC.  Have to loop through the input struct and calculate
%f^ for each different amino acid group. will have a vector for each fhat
%group and their own counters.
two_group_Fhat_count = 1;
two_group_Fhat = [];
three_group_Fhat_count = 1;
three_group_Fhat = [];
four_group_Fhat_count = 1;
four_group_Fhat = [];
six_group_Fhat_count = 1;
six_group_Fhat = [];
for i = 1:length(names) %traverse each named field
    currentAA = char(names(i)); %gets the ith amino acid
    currentAACap = char(names2(i));%gets the ith amino acid capitalized
    currntAA_len = length(AAs.(currentAA)); %this will allow categorization of each type of isotype number group (1, 2, 3, 4, or 6)
    if currntAA_len == 1
        continue
    else
        curr_frequency_vec = []; %will initialize for each amino acid and collect the frequencies
        curr_frequency_vec_count = 1; %counter resets each time
        for j = 1:currntAA_len %indexes the number of codons for a given AA to increment the appropriate # of times.
            % now need to calculate the f hat for each amino acid.
            current_codon = AAs.(currentAA)(j);%gets the jth codon for the ith amino acid
            current_codon_freq = Input_RSCU_Struct.codon_freq.(currentAACap).Freq(j);%gets the current codon frequency
            current_codon_freq = current_codon_freq^2; %square for ENC calculation
            curr_frequency_vec(curr_frequency_vec_count) = current_codon_freq; %add to growing list of the ith AAs Freq values
            curr_frequency_vec_count = curr_frequency_vec_count+1;
        end
        %now need to add the squared frequencies together for ENC count.
        curr_frequency_vec_sum = sum(curr_frequency_vec);
        %now multiply by the number of codons in the current sequence for
        %the given amino acid.
        codons = Input_RSCU_Struct.AA_codoncount.(currentAA); %number of codons for the given amino acid. 
        curr_frequency_vec_sumXcodons = curr_frequency_vec_sum*codons;
        %now subtract 1 and divide by number of codons - 1 to get the
        %current AA f^ value.  then add it to the correct group and
        %increment that groups counter.
        current_fhat = (curr_frequency_vec_sumXcodons-1)./(codons-1);
        if currntAA_len == 2
            two_group_Fhat(two_group_Fhat_count) = current_fhat;
            two_group_Fhat_count = two_group_Fhat_count + 1;
        elseif currntAA_len == 3
            three_group_Fhat(three_group_Fhat_count) = current_fhat;
            three_group_Fhat_count = three_group_Fhat_count + 1;
        elseif currntAA_len == 4
            four_group_Fhat(four_group_Fhat_count) = current_fhat;
            four_group_Fhat_count = four_group_Fhat_count + 1;
        elseif currntAA_len == 6
            six_group_Fhat(six_group_Fhat_count) = current_fhat;
            six_group_Fhat_count = six_group_Fhat_count + 1;
        end
    end
end

%Now that we have a vector of fhat values for each AA subgroup based on
%number of synonymous codons, we just have to average them then calculate
%the ENC.
two_group_Fhat_ave = mean(two_group_Fhat);
three_group_Fhat_ave = mean(three_group_Fhat);
four_group_Fhat_ave = mean(four_group_Fhat);
six_group_Fhat_ave = mean(six_group_Fhat);

ENC_Output = 2 + (9./two_group_Fhat_ave) + (1./three_group_Fhat_ave) + (5./four_group_Fhat_ave) + (3./six_group_Fhat_ave);
end